Developing Indicators of Habitat and Ecosystem Change in the Gulf of Maine

Study area

The focus area for this project is coastal Maine, extending from the coast to the eastern boundary of the NOAA statistical areas 511, 512, 513.

usStates <- rnaturalearth::ne_states("united states of america", returnclass = "sf")
ne_us <- usStates %>% filter(name == "Maine")
statarea <- sf::st_read(paste0(gmRi::shared.path(group = "Res_Data", folder = "Shapefiles/Statistical_Areas"), "Statistical_Areas_2010_withnames.shp"), quiet = TRUE) %>% 
  filter(Id %in% c(511, 512, 513)) %>% 
  mutate(Id = as.factor(Id))
statarea_sf <- sf::st_simplify(statarea, dTolerance = .05)
mcc_turnoff_sf <- sf::st_read(here::here("Data/Shapefiles/MCC_turnoff/MCC_turnoff.shp"), quiet = TRUE)

ggplot() +  
  geom_sf(data = statarea_sf, aes(fill = Id)) + 
  geom_sf(data= ne_us, fill = "grey") +
  geom_sf(data = mcc_turnoff_sf, fill = "transparent", color = "black") +
  scale_fill_discrete(name = "Stat area") +
  theme(panel.background = element_blank(), 
        panel.grid = element_blank(), 
        axis.title = element_blank(),
        axis.ticks = element_blank())
Study region. NOAA statistical areas are indicated as colored polygons. Maine coastal current study region indicated by the black outlined open polygon

Study region. NOAA statistical areas are indicated as colored polygons. Maine coastal current study region indicated by the black outlined open polygon

Indicators

The following indicators are used in this report

  • Surface and bottom temperature anomalies (FVCOM, OISST)
  • Surface and bottom salinity anomalies (FVCOM)
  • Maine Coastal Current Index (FVCOM)
  • Species-based lobster predator index (NOAA Trawl Survey)
  • Continuous Plankton Recordings
# Indicators
FVCOM <- read_csv(here::here("Indicators/FVCOM_stat_area_temps.csv"))
OISST <- read_csv(here::here("Indicators/OISST_stat_area_anoms.csv")) %>% filter(Date <= as.Date("2020-12-31"))
sal <- read_csv(here::here("Indicators/FVCOM_stat_area_sal.csv"))
maineCC <- read_csv(here::here("Indicators/mcc_pca_pc1_2.csv")) %>% 
  rename("Year" = yr, "Month" = mon)
species_index <- read_rds(here::here("Indicators/nmfs_trawl_lobster_predator_indices.rdata")) %>% 
  rename("Year" = year)
mcc_turnoff_subset <- read_csv("/Users/mdzaugis/Documents/EcosystemIndicators/Indicators/mcc_turnoff_subset.csv")
cprData <- read_csv(here::here("Indicators/cpr_focal_pca_timeseries_period_1961-20017.csv")) %>% 
  mutate(`First Mode` = `First Mode`*-1) %>%
  rename("Year" = year,
         "FirstMode" = `First Mode`,
         "SecondMode" = `Second Mode`) %>% 
  select(Year, FirstMode, SecondMode)

Temperature

Source: FVCOM NECOFS Monthly Means Thredds Link Methods:

  • Surface and bottom layer (1 m above bathymetry)
  • Regridded to regular 0.1 deg grid
  • Averaged over NOAA statistical area
  • Baseline climatology 1990-2020
yrOISST <- OISST %>% 
  mutate(Year = lubridate::year(Date)) %>% 
  group_by(Year, stat_area) %>% 
  summarise(temp_var = var(temp),
            temp = mean(temp),
            .groups = "drop") %>% 
  mutate(stat_area = as.factor(stat_area))

yrFVCOM <- FVCOM %>% 
  group_by(Year, stat_area) %>% 
  summarise(sur_temp = mean(sur_temp_anom),
            bot_temp = mean(bot_temp_anom), 
            sur_temp_var = var(sur_temp_anom),
            bot_temp_var = var(bot_temp_anom),
            .groups="drop") %>% 
  mutate(stat_area = as.factor(stat_area))

tPlot1 <- yrOISST %>% 
  ggplot() +
  geom_line(aes(Year, temp, col = stat_area)) + 
  theme_bw()  +
  labs(y = "SST anomalies (deg C)")

tPlot2 <- yrFVCOM %>% 
  ggplot() +
  geom_line(aes(Year, bot_temp, col = stat_area)) + 
  theme_bw() +
  labs(y = "Bottom T anomalies (deg C)")

tPlot1/tPlot2

Salinity

Source: FVCOM NECOFS Monthly Means Thredds Link Methods:

  • Surface and bottom layer (1 m above bathymetry)
  • Regridded to regular 0.1 deg grid
  • Averaged over NOAA statistical area for each year
  • Baseline climatology 1990-2017 (last year of data)
yrSal <- sal %>% 
  group_by(Year, stat_area) %>% 
  summarise(sur_sal = mean(sur_sal_anom),
            bot_sal = mean(bot_sal_anom),
            sur_sal_var = var(sur_sal_anom),
            bot_sal_var = var(bot_sal_anom), .groups = "drop") %>% 
  mutate(stat_area = as.factor(stat_area))

sPlot1 <- yrSal %>% 
  ggplot() +
  geom_line(aes(Year, sur_sal, col = stat_area)) + 
  theme_bw() +
  labs(y = "SSS anomalies (deg C)")

sPlot2 <- yrSal %>% 
  ggplot() +
  geom_line(aes(Year, bot_sal, col = stat_area)) + 
  theme_bw() +
  labs(y = "Bottom S anomalies (deg C)")

sPlot1/sPlot2

Maine Coastal Current Index

Source: FVCOM NECOFS Monthly Means Thredds Link Methods:

  • Surface layer
  • Crop to Maine Coastal Current interest area
  • Regridded to regular 0.1 deg grid
  • Averaged over NOAA statistical area for each year
  • See MCC_index_report.Rmd for details
  • June-September index
yrMCC <- maineCC %>% 
  filter(Month %in% c(6,7,8,9)) %>% 
  group_by(Year) %>% 
  summarise(MCCPC1 = mean(PC1))

MCCPC_plot <- yrMCC %>% 
  mutate(c = ifelse(MCCPC1 > 0, 1, 2)) %>% 
  ggplot() + geom_col(aes(x = Year, y = MCCPC1, fill = as.factor(c))) + theme_bw() + theme(legend.position = "none") + labs(x = "Year", y = "Maine Coastal Current PC1")

MCCPC_maps <- mcc_turnoff_subset %>% 
  mutate(Year = lubridate::year(as.Date(Date)),
         Month = lubridate::month(as.Date(Date))) %>% 
  filter(Year %in% c(1980, 1983, 1990, 2011, 2000, 2016),
         Month %in% c(6,7,8,9)) %>% 
  group_by(Year, lat, lon) %>% 
  summarise(u = mean(u),
            v = mean(v), .groups = "drop") %>% 
  mutate(vel = sqrt(u^2+v^2), 
         PC = if_else(Year == 2010, "negative PC1", if_else(Year ==2013, "positive PC1", "neutral PC1"))) %>% 
  ggplot() + geom_sf(data= ne_us, fill = "grey") + 
  geom_segment(aes(x = lon, y = lat, xend=lon+u, yend=lat+v, color = vel), 
               arrow = arrow(angle = 10, length = unit(0.05, "inches"), type = "closed")) + 
  scale_color_viridis_c() + theme_bw() + 
  coord_sf(datum = "+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0") + 
  facet_wrap(~Year, nrow = 1)+ 
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        axis.title = element_blank(), axis.text = element_blank(),
        axis.ticks = element_blank())

MCCPC_plot / MCCPC_maps

Maine Coastal Current full year index is highly correlated to salinity. Years where there is a strong offshore current are also years with higher salinity. When the downeast current is stronger, the water tends to be fresher.

fullyrMCC <- maineCC %>% 
  group_by(Year) %>% 
  summarise(MCCPC1 = mean(PC1))

MCCPC_plot2 <- fullyrMCC %>% 
  mutate(c = ifelse(MCCPC1 > 0, 1, 2)) %>% 
  ggplot() + geom_col(aes(x = Year, y = MCCPC1, fill = as.factor(c))) + theme_bw() + theme(legend.position = "none") + labs(x = "Year", y = "Maine Coastal Current PC1")

MCCPC_maps2 <- mcc_turnoff_subset %>% 
  mutate(Year = lubridate::year(as.Date(Date)),
         Month = lubridate::month(as.Date(Date))) %>% 
  filter(Year %in% c(1980, 1987, 2000,2012)) %>% 
  group_by(Year, lat, lon) %>% 
  summarise(u = mean(u),
            v = mean(v), .groups = "drop") %>% 
  mutate(vel = sqrt(u^2+v^2), 
         PC = if_else(Year == 2010, "negative PC1", if_else(Year ==2013, "positive PC1", "neutral PC1"))) %>% 
  ggplot() + geom_sf(data= ne_us, fill = "grey") + 
  geom_segment(aes(x = lon, y = lat, xend=lon+u, yend=lat+v, color = vel), 
               arrow = arrow(angle = 10, length = unit(0.05, "inches"), type = "closed")) + 
  scale_color_viridis_c() + theme_bw() + 
  coord_sf(datum = "+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0") + 
  facet_wrap(~Year, nrow = 1)+ 
  theme(panel.background = element_blank(), panel.grid = element_blank(), 
        axis.title = element_blank(), axis.text = element_blank(),
        axis.ticks = element_blank())

MCCPC_plot2 /sPlot1 / MCCPC_maps2 

Species Based Predator Index

Source: NOAA NE Fisheries Trawl Survey Methods:

  • Filtered to 15 lobster predators (ASMFC 2020)
  • Cropped to NOAA stat areas 511, 512, 513
  • Stratum abundance: abundance per trawl summed over stat area
  • Stratified abundance: abundance / km2 multiplied by the area of the strata
yrSpecies_index <- 
  species_index %>% 
  filter(season == "Both",
         `stratum id` != 'Strata 511-513') %>% 
  dplyr::select(Year, `stratum id`, "predators" = `stratified biomass (kg)`)

yrSpecies_index %>% 
  ggplot() +
  geom_line(aes(Year, predators, color = `stratum id` )) + theme_bw()

CPR data

Source: Continuous Plankton Recording

  • First Mode = smaller zooplankton species explains ~ 50% of variance
  • Second Mode = Calanus explains ~ 26% of variance
cprData %>% 
  pivot_longer(cols = c(FirstMode, SecondMode), 
               names_to = "Mode", values_to = "values") %>% 
  ggplot() +
  geom_line(aes(Year, values, col = Mode)) +
  theme_bw() +
  scale_color_discrete(label = c("Small spp", "Calanus"))

# area weighted
# regional time series 
# area weighted average 

Indicators PCA

allIndex <- left_join(yrFVCOM, yrSal, by = c("Year", "stat_area")) %>% 
  left_join(yrOISST, by = c("Year", "stat_area")) %>% 
  left_join(yrMCC, by = c("Year" = "Year")) %>% 
  left_join(yrSpecies_index, by = c("Year" = "Year",
                                    "stat_area" = "stratum id")) %>% 
  left_join(cprData, by = "Year") %>% 
  na.omit() %>% 
  select(-sur_temp, -sur_sal, -sur_sal_var, -bot_sal_var, -sur_temp_var, -bot_temp_var, -temp_var, -temp)

Previous analysis shows surface and bottom temperature and surface and bottom salinity load similarly in a PCA. To reduce variables only bottom temperature and salinity are used in the following PCA.

  • Data: Bottom temp, bottom salinity, MCC, Predator Index, CPR data mode one and two
  • Method: Principal components analysis
  • Years: 1990-2016

PCA Summary

For each stat area, the first two principal components have eigenvalues greater than one and were retained for further analysis. PC1 and PC2 explain 29.7% and 24% of the variability, respectively, in stat area 511, 26.5% and 23.2% in stat area 512, and 29.6% and 26% in stat area 513.

# PCA by stat area
indicator_pca <- allIndex %>% 
  nest(data = c(-stat_area, -Year),
       Year = Year) %>% 
  mutate(pca = map(data, ~prcomp(.x, scale. = TRUE, center = TRUE)),
         pca_index = map(pca, ~tibble("PC1" = .x$x[,1],
                                      "PC2" = .x$x[,2],
                                      "PC3" = .x$x[,3])))

index_pca <- indicator_pca %>% 
  unnest(c(pca_index, Year, data)) %>% 
  mutate(PC2 = if_else(stat_area == "511", PC2*-1, PC2),
         PC1 = if_else(stat_area == "512", PC1*-1, PC1),
         PC3 = if_else(stat_area == "513", PC3*-1, PC3))

PCA511 <- data.frame(summary(indicator_pca$pca[[1]])$importance) %>% 
  rownames_to_column("Result") %>%  mutate(stat_area = "511")
PCA512 <- data.frame(summary(indicator_pca$pca[[2]])$importance) %>% 
  rownames_to_column("Result") %>%  mutate(stat_area = "512")
PCA513 <- data.frame(summary(indicator_pca$pca[[3]])$importance) %>% 
  rownames_to_column("Result") %>%  mutate(stat_area = "513")

PCA_table <- bind_rows(PCA511,PCA512,PCA513)

knitr::kable(PCA_table)
Result PC1 PC2 PC3 PC4 PC5 PC6 stat_area
Standard deviation 1.334865 1.199165 0.9960424 0.9530443 0.7204248 0.6006102 511
Proportion of Variance 0.296980 0.239670 0.1653500 0.1513800 0.0865000 0.0601200 511
Cumulative Proportion 0.296980 0.536640 0.7019900 0.8533800 0.9398800 1.0000000 511
Standard deviation 1.260536 1.179767 1.0520613 0.9527912 0.7602674 0.6531084 512
Proportion of Variance 0.264820 0.231970 0.1844700 0.1513000 0.0963300 0.0710900 512
Cumulative Proportion 0.264820 0.496800 0.6812700 0.8325700 0.9289100 1.0000000 512
Standard deviation 1.332878 1.248477 1.0304501 0.9131648 0.6932772 0.5370390 513
Proportion of Variance 0.296090 0.259780 0.1769700 0.1389800 0.0801100 0.0480700 513
Cumulative Proportion 0.296090 0.555880 0.7328500 0.8718300 0.9519300 1.0000000 513
scree1 <- fviz_eig(indicator_pca$pca[[1]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))
scree2 <- fviz_eig(indicator_pca$pca[[2]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))
scree3 <- fviz_eig(indicator_pca$pca[[3]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))

#scree1 / scree2 / scree3

PC time series

Below are the time series for PC1, PC2, and PC3. PC1 and PC2 time series are highly correlated between stat areas.

PC1plot <- index_pca %>% 
  ggplot() + 
  geom_line(aes(Year, PC1, color = stat_area))

PC2plot <- index_pca %>% 
  ggplot() + 
  geom_line(aes(Year, PC2, color = stat_area))

PC3plot <- index_pca %>% 
  ggplot() + 
  geom_line(aes(Year, PC3, color = stat_area))

pc1cors <- index_pca %>% 
  select(stat_area, PC1, PC2, PC3, Year) %>% 
  pivot_wider(names_from = stat_area, values_from = c(PC1, PC2, PC3)) %>% 
  summarise("511_512" = corrr::correlate(PC1_511, PC1_512)[[2]],
         "511_513" = corrr::correlate(PC1_511, PC1_513)[[2]],
         "512_513" = corrr::correlate(PC1_512, PC1_513)[[2]])

pc2cors <- index_pca %>% 
  select(stat_area, PC1, PC2, PC3, Year) %>% 
  pivot_wider(names_from = stat_area, values_from = c(PC1, PC2, PC3)) %>% 
  summarise("511_512" = corrr::correlate(PC2_511, PC2_512)[[2]],
         "511_513" = corrr::correlate(PC2_511, PC2_513)[[2]],
         "512_513" = corrr::correlate(PC2_512, PC2_513)[[2]])

pc3cors <- index_pca %>% 
  select(stat_area, PC1, PC2, PC3, Year) %>% 
  pivot_wider(names_from = stat_area, values_from = c(PC1, PC2, PC3)) %>% 
  summarise("511_512" = corrr::correlate(PC3_511, PC3_512)[[2]],
         "511_513" = corrr::correlate(PC3_511, PC1_513)[[2]],
         "512_513" = corrr::correlate(PC3_512, PC3_513)[[2]])

PC1plot / PC2plot / PC3plot

Loadings plot

Indicators load similarly for each stat area. The Maine coastal current, first mode (small zooplankton), and second mode (Calanus) tend to load together. The lobster predator index has the most variable relationship to PC1 and PC2 with respect to stat area. In stat area 512 and 513, predators loads with bottom temperature. In area 511, predators load separated from the rest of the indicators and has a stronger influence on PC2 than PC1. Bottom salinity and the plankton FirstMode (Calanus) tend to load similarly for each stat area.

Interpretation:

Stat area 511:

  • PC1: Bottom sal and temp negatively correlated to MCC and small zooplankton
  • PC2: Bottom sal and Calanus positively correlated and negatively correlated to lob predators

Stat area 512:

  • PC1: Bottom temp is negatively correltated with MCC and Calanus
  • PC2: most strongly influenced by bottom salinity

Stat area 513:

  • PC1: Most influenced by bottom temp and predators
  • PC2: Bottom salinity, MCC, Calanus all load strongly
Figure 4. Loadings of the variables

Figure 4. Loadings of the variables

Indicator PC1_511 PC1_512 PC1_513 PC2_511 PC2_512 PC2_513 PC3_511 PC3_512 PC3_513
bot_temp 0.619 0.586 0.655 0.029 -0.410 -0.136 0.382 -0.013 0.190
bot_sal 0.403 0.107 0.372 -0.474 -0.717 -0.506 0.442 -0.014 0.182
MCCPC1 -0.471 -0.488 -0.222 -0.133 -0.208 -0.541 0.284 0.472 0.519
predators -0.118 0.220 0.603 0.522 -0.204 0.166 0.590 0.604 -0.143
FirstMode -0.059 -0.237 0.041 -0.632 -0.363 -0.366 -0.102 -0.637 -0.747
SecondMode -0.463 -0.550 -0.138 -0.290 -0.317 -0.521 0.468 0.080 -0.288
Figure 4. Loadings of the variables

Figure 4. Loadings of the variables

Chronolgical cluster

The cluster plot groups years that are most similar based on PC1 and PC2. For this analysis stat areas were grouped together.

allIndex_ca <- allIndex %>% 
  group_by(Year) %>% 
  pivot_wider(names_from = stat_area,
              values_from = c(bot_temp, bot_sal, MCCPC1, predators, FirstMode, SecondMode)) %>% 
  column_to_rownames("Year")

allIndex_ca <- scale(allIndex_ca)

# determine number of clusters
wss <- (nrow(allIndex_ca)-1)*sum(apply(allIndex_ca,2,var)) # get sum of squares

for (i in 2:12) wss[i] <- sum(kmeans(allIndex_ca,
   centers=i)$withinss)

# look for break in plot like a scree plot
kmeans_scree <- ggplot() + 
  geom_line(aes(x = c(1:12), y = wss)) +
  labs(x = "Number of Clusters",
  y ="Within groups sum of squares")

# K-Means Cluster Analysis
fit <- kmeans(allIndex_ca, 3) # 4 cluster solution

# get cluster means
#aggregate(allIndex_ca,by=list(fit$cluster),FUN=mean)

# append cluster assignment
lobData_cluster <- data.frame(allIndex_ca, fit$cluster)

# Cluster Plot against 1st 2 principal components

# vary parameters for most readable graph
indicators_cluster <- cluster::clusplot(allIndex_ca, fit$cluster, color=TRUE, shade=TRUE,
   labels=2, lines=0)

indicators_cluster
## $Distances
## NULL
## 
## $Shading
## [1] 30.698422  9.156137  6.145441

Breakpoint Analysis

The breakpoint analysis estimates a change in the linear relationship in the data. The location of the break indicates there may be a difference in the relationship of the variable before and after that point. We find that breakpoints change depending on the variable.

Breakpoint of indicators
# breapoint function
bp_analysis <- function(x){
  mod <-  lm(values ~ Year, data = x)
  o <- tryCatch(segmented::segmented(mod, seg.Z = ~Year, psi = c(2005)),  # need to estimate bp
                error = function(cond){cond})
}

set.seed(25)
# Breakpoint by stat area
indicator_breakpoint <- allIndex %>%
  pivot_longer(cols = c(bot_temp, bot_sal, MCCPC1, predators, FirstMode, SecondMode), 
               names_to = "Indicator", values_to = "values") %>% 
  nest(data = c(-stat_area, -Indicator)) %>% 
  mutate(seg = purrr::map(data, ~bp_analysis(.x)),
         bp = purrr::map(seg, ~data.frame(.x$psi)))

# get fitted values and plot over data
bp_plots <- list()
for(i in 1:length(indicator_breakpoint$data)){
  
  name <- paste0(indicator_breakpoint$stat_area[[i]], "_", indicator_breakpoint$Indicator[[i]])
  
    # get the fitted data
  fitted_df <- fitted(indicator_breakpoint$seg[[i]])
  
  if(is.null(fitted_df)){
    
    "check error log"
    
  } else {
      
    seg_model <- data.frame(Year = indicator_breakpoint$data[[i]]$Year, values = fitted_df)
  
  # plot the fitted model
  
    bp_plots[[name]] <- indicator_breakpoint$data[[i]] %>% 
      ggplot() + geom_line(aes(Year, values)) +
      geom_line(data = seg_model, aes(x = Year, y = values), col = "dark red")
  }
}



# extract the breakpoints
break_years <- indicator_breakpoint %>% 
  unnest(bp) %>% 
  select(stat_area, Indicator, Est.) %>% 
  mutate(Indicator = factor(Indicator, levels = c("MCCPC1", "predators", "bot_sal", "bot_temp", "FirstMode", "SecondMode")))

# plot break points
ggplot(break_years) +
  geom_point(aes(Est., Indicator, color = stat_area, shape = stat_area), size = 3) +
  scale_color_grey(name = "Stat area") +
  scale_shape(name = "Stat area") +
  labs(x = "Estimated breakpoint")

Breakpoint of PC1 and PC2
set.seed(8)
# Breakpoint by stat area
pc1_breakpoint <- index_pca %>%
  select(stat_area, Year, PC1, PC2)  %>%
  pivot_longer(cols = c(PC1, PC2), 
               names_to = "Indicator", values_to = "values") %>% 
  nest(data = c(-stat_area, -Indicator))%>% 
  mutate(seg = purrr::map(data, ~bp_analysis(.x)),
         bp = purrr::map(seg, ~data.frame(.x$psi))) %>% 
  unnest(bp) %>% 
  select(stat_area, Indicator, Est., St.Err)

knitr::kable(pc1_breakpoint)
stat_area Indicator Est. St.Err
511 PC1 2007.000 1.776786
511 PC2 1997.713 2.730764
512 PC1 2007.000 3.017780
512 PC2 1998.000 3.466467
513 PC1 1995.149 3.778576
513 PC2 1988.000 7.077578

Lobster Data

ALSI <- read_csv(here::here("Biological_data/SettlementIndex.csv"))
ME_landings <- read_csv(here::here("Biological_data/ME_lob_landings_1950_2019.csv"))
ASFMCindicies <- read_csv(here::here("Biological_data/GOMGBK_indices.csv")) %>% 
  rename("lob_index" = Index, "name" = Survey)
GOMindex <- ASFMCindicies %>% 
  filter(name %in% c("MeFQ2", "MeMQ2"))
NEFSCindex <- ASFMCindicies %>% 
  filter(name %in% c("NefscFQ2", "NefscMQ2"))
sublegal <- readxl::read_excel(here::here("Biological_data/RawData.xlsx")) %>% 
  rename("Year" = year, "Month" = month, "stat_area" = `stat area`)
MEDMR_trawl <- read_csv(paste0(gmRi::box_path("Res_Data", "Maine_NH_Trawl"), "MaineDMR_Trawl_Survey_Catch_Data_2021-05-14.csv")) %>% 
  filter(Common_Name == "Lobster American")
NEFSC_meIndex <- read_csv(here::here("Biological_data/nmfs_trawl_lobster_indices.csv")) %>% 
  rename("Year" = est_year, "stat_area" = lobster_strata) %>% 
  mutate(stat_area = as.factor(stat_area))

ALSI index

Source: American Lobster Settlement Index data portal Sites:

  • Jonesport, Length: 2002-2018, stat area: 511
  • Mt. Desert Island, Length: 2000-2018, stat area: 512
  • Outer Penobscot Bay, Length: 2000-2018, stat area: 512
  • Mid-coast, Length: 1989-2018, stat area: 513
  • Casco Bay, Length: 2000-2018, stat area: 513
  • York, Length: 2000-2018, stat area: 513

Methods:

  • Sites grouped by NOAA stat area
  • Time series cropped to shortest length
  • Averaged by stat area
statKey <- data.frame("Location" = c('Jonesport',
'Mt. Desert Island',
'Outer Pen Bay',
'Mid-coast',
'Casco Bay',
'York'), "stat_area" = c(511,512,512,513,513,513))

ALSI <- ALSI %>% 
  left_join(statKey, by = "Location") %>% 
  filter(!is.na(stat_area),
         Year >= 2000) %>% 
  group_by(stat_area, Year) %>% 
  summarise(lob_index = mean(Yoy_density), .groups = "drop") %>% 
  mutate(stat_area = as.factor(stat_area),
         name = "ALSI") %>% 
  na.omit()

ALSI_cors <- ALSI %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area, name) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]])

ALSI_plot <- ALSI %>% 
  ggplot() +
  geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
  scale_color_discrete(name = "Stat area") +
  labs(y = "ALSI yoy density")

ALSI_plot

Sublegal index

Source: Ventless trap survey

Calculate stratified means

  • Calculate catch per unit effort for each site for each year
  • Multiply cpue by the depth strata area factor
  • Group by stat area, sum the outputs
strata_area <- tibble("stat_area" = c(511,512,513),
                                 "1" = c(122, 566, 315),
                                 "2" = c(82, 395, 338),
                                 "3" = c(92, 420, 198)) %>% 
  pivot_longer(cols = c("1", "2", "3"), names_to = "depth stratum", values_to = "strata_area") %>% 
  group_by(stat_area) %>% 
  mutate(`depth stratum` = as.numeric(`depth stratum`),
         total = sum(strata_area)) 

# average number of lobsters per trap haul at each site
u <- sublegal  %>% 
  group_by(Year, `site ID`, `effort ID`, stat_area, `depth stratum`) %>% 
  summarise(n_lob = sum(`sample ID` != 0), .groups = "drop") %>% 
  group_by(Year, `site ID`, stat_area, `depth stratum`) %>% 
  summarise(n_lob_u = mean(n_lob), .groups = "drop")

# average number of lobsters per trap haul at each depth stratum within a stat area
v <- sublegal  %>% 
  group_by(stat_area, `depth stratum`, Year, `site ID`, `effort ID`) %>% 
  summarise(n_lob = sum(`sample ID` != 0), .groups = "drop") %>% 
  group_by(stat_area, `depth stratum`, Year) %>% 
  summarise(n_lob_v = mean(n_lob), .groups = "drop")

# choose the relevant v which corresponds to the depth strata that u is in ("stat_area", "depth stratum", "Year"). Each site will have one w value
w <- left_join(v, u, by = c("stat_area", "depth stratum", "Year")) %>% 
  mutate(w = (n_lob_v-n_lob_u)^2)

#sum of all w within the same depth strata in a stat area
x <- w %>% 
  group_by(`depth stratum`, stat_area, Year) %>% 
  summarise(x = sum(w), .groups = "drop")

#number of sites per depth stratum within a given stat area
y <- sublegal %>% 
  group_by(stat_area, `depth stratum`, Year) %>% 
  summarise(site_ID = unique(`site ID`)) %>% 
  summarise(y = n(), .groups = "drop")

#if done correctly, each stat area per year should have three z values, one for each depth stratum
z <- left_join(x, y, by = c("stat_area", "depth stratum", "Year")) %>% 
  mutate(z = x/(y-1))

# Calculate stat area variance 
sublegal_vari <- left_join(z, strata_area, by = c("stat_area", "depth stratum")) %>% 
  mutate(a = 1/(total^2),
         b = strata_area*(strata_area-y)*(z/y)) %>% 
  group_by(stat_area, Year, a) %>% 
  summarise(stat_sum = sum(b), .groups = "drop") %>% 
  mutate(vari = a*stat_sum,
         sd = sqrt(vari))
strata_area_factor <- tibble("stat_area" = c(511,512,513),
                                 "1" = c(0.412162162, 0.409847936, 0.370152761),
                                 "2" = c(0.277027027, 0.28602462, 0.397179788),
                                 "3" = c(0.310810811, 0.304127444, 0.23266745)) %>% 
  pivot_longer(cols = c("1", "2", "3"), names_to = "depth stratum", values_to = "strata_scale") %>% 
  mutate(`depth stratum` = as.numeric(`depth stratum`))

sublegal_cpue <- sublegal %>% 
  group_by(`trip ID`, `trip date`, Year, Month, `site ID`, 
           `depth stratum`, stat_area,
           `soak nights`, depth, `depth unit`, 
           `latitude (dd)`, `longitude (dd)`, `effort ID`) %>% 
  mutate(lob_count = sum(`sample ID` != 0), 
            lob_effort = lob_count/`soak nights`) %>% 
  group_by(`trip ID`, `trip date`, Year, Month, 
           `site ID`, `depth stratum`, stat_area,
           `soak nights`, depth, `depth unit`, 
           `latitude (dd)`, `longitude (dd)`) %>% 
  summarise(cpue = sum(lob_effort)/sum(!is.na(unique(`effort ID`))), .groups = "drop") %>% 
  left_join(strata_area_factor, by = c("stat_area", "depth stratum")) %>% 
  mutate(stratified_cpue = cpue*strata_scale,
         stat_area = as.factor(stat_area)) %>% 
  group_by(stat_area, Year) %>% 
  summarise(lob_index = sum(stratified_cpue), .groups = "drop") %>% 
  mutate(name = "sublegal_cpue")

sub_leagal_cors <- sublegal_cpue %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area, name) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]])
            
sublegal_plot <- sublegal_cpue %>% 
  ggplot() +
  geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
  scale_color_discrete(name = "Stat area") +
  labs(y = "Sublegal lobster cpue")

sublegal_var_plot <- sublegal_vari %>% 
  ggplot() +
  geom_line(aes(Year, vari, col = as.factor(stat_area))) +
  scale_color_discrete(name = "Stat area") +
  labs(y = "Sublegal lobster variance")

sublegal_plot

ME-NH Trawl Survey

Source: ME-NH Trawl Survey

Calculate stratified means

  • Calculate catch per unit effort for each site for each year
  • Multiply cpue by the depth strata area factor
  • Group by stat area, sum the outputs
stat_area_trawl_key <- tibble("stat_area" = c(511, 512, 512, 513, 513),
                              "Region" = c(5,4,3,2,1))

DMR_strata_area <- tibble("Stratum" = c("1", "2", "3", "4"),
                          "1" = c(253.27, 214.22, 227.35, 225.65),
                          "2" = c(279.63, 191.23, 211.66, 263.49),
                          "3" = c(259.62, 262.90, 280.03, 183.69),
                          "4" = c(205.30, 206.12, 310.49, 170.72),
                          "5" = c(138.54, 220.49, 365.04, 196.11)) %>% 
  pivot_longer(cols = c("1", "2", "3", "4", "5"), names_to = "Region", values_to = "strata_area") %>% 
  group_by(Region) %>% 
  mutate(Stratum = as.numeric(Stratum),
         total = sum(strata_area),
         Region = as.numeric(Region)) %>% 
  left_join(stat_area_trawl_key) %>% 
  group_by(stat_area, Stratum) %>% 
  summarise(strata_area = sum(strata_area),
            total = sum(total))

# average number of lobsters per tow 
u <- MEDMR_trawl %>% 
  left_join(stat_area_trawl_key) %>% 
  group_by(Season, Year, Tow_Number, stat_area, Stratum) %>% 
  summarise(n_lob_u = sum(Expanded_Weight_kg, na.rm = TRUE), .groups = "drop")

# average number of lobsters per trap haul at each depth stratum within a stat area
v <- MEDMR_trawl %>%
  left_join(stat_area_trawl_key) %>%  
  group_by(stat_area, Stratum, Year, Season) %>% 
  summarise(n_lob_v = mean(Expanded_Weight_kg, na.rm = TRUE), .groups = "drop")

# choose the relevant v which corresponds to the depth strata that u is in ("stat_area", "depth stratum", "Year"). Each site will have one w value
w <- left_join(v, u, by = c("stat_area", "Stratum", "Year", "Season")) %>% 
  mutate(w = (n_lob_v-n_lob_u)^2)

#sum of all w within the same depth strata in a stat area
x <- w %>% 
  group_by(Stratum, stat_area, Year, Season) %>% 
  summarise(x = sum(w), .groups = "drop")

#number of sites per depth stratum within a given stat area
y <- MEDMR_trawl %>%
  left_join(stat_area_trawl_key) %>% 
  group_by(stat_area, Stratum, Year, Season) %>% 
  summarise(Tow_number = unique(Tow_Number, na.rm = TRUE)) %>% 
  summarise(y = n(), .groups = "drop")

#if done correctly, each stat area per year should have three z values, one for each depth stratum
z <- left_join(x, y, by = c("stat_area", "Stratum", "Year", "Season")) %>% 
  mutate(z = x/(y-1))

# Calculate stat area variance 
MEDMR_vari <- left_join(z, DMR_strata_area, by = c("stat_area", "Stratum")) %>% 
  mutate(a = 1/(total^2),
         b = strata_area*(strata_area-y)*(z/y)) %>% 
  group_by(stat_area, Year, a, Season) %>% 
  summarise(stat_sum = sum(b), .groups = "drop") %>% 
  mutate(vari = a*stat_sum,
         sd = sqrt(vari))

medmr_vari_plot <- MEDMR_vari %>% 
  ggplot() + 
  geom_line(aes(Year, vari, col = as.factor(stat_area))) +
  facet_wrap(~Season)
MEDMR_cpue <- MEDMR_trawl %>%
  left_join(stat_area_trawl_key) %>%  
  group_by(Year, Season, stat_area, Stratum)%>%
  mutate(tows=n_distinct(Tow_Number))%>%
  group_by(Year, Season, tows, stat_area, Stratum) %>%
  summarise(weight = sum(Expanded_Weight_kg,na.rm=TRUE), 
            catch=sum(Expanded_Catch,na.rm=TRUE))%>%
  mutate(weight_tow = weight/tows, 
         catch_tow = catch/tows) %>% 
  left_join(DMR_strata_area) %>% 
  mutate(stratified_wpue = weight_tow*(strata_area/total),
         stratified_cpue = catch_tow*(strata_area/total)) %>% 
  group_by(stat_area, Year, Season) %>% 
  summarise(lob_cpue = sum(stratified_cpue), 
            lob_index = sum(stratified_wpue),
            .groups = "drop") %>% 
  mutate(stat_area = as.factor(stat_area),
         name = "ME-NH_trawl")

MEDMR_cors <- MEDMR_cpue %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area, Season) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]])
            

MEDMR_cpue %>% 
  ggplot() +
  geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
  scale_color_discrete(name = "Stat area") +
  labs(y = "ME-NH Trawl lobster cpue") +
  facet_wrap(~Season)

ME yearly landings

Yearly Maine lobster landings in pounds from 1950-2020.

Use a breakpoint in the the landings and use that as the point to run the regression.

ME_landings_cors <- ME_landings %>% 
  left_join(index_pca, by = c("Year")) %>% 
  group_by(stat_area) %>% 
  summarise(corPC1 = corrr::correlate(Pounds, PC1)[[2]],
            corPC2 = corrr::correlate(Pounds, PC2)[[2]],
            corPC3 = corrr::correlate(Pounds, PC3)[[2]]) 

ME_pounds <- ME_landings %>% 
  mutate(name = "ME_landings") %>% 
  select(Year, "lob_index" = Pounds, name) 

ME_pounds %>% 
  ggplot() +
  geom_line(aes(Year, lob_index)) +
  labs(y = "Maine lobster landings (pounds)")

Nefsc index

ASFMC lobster abundance index based on the NE fisheries trawl survey. Time spans 1978-2018 and indices are split into seasons and sex.

NEFSC_cors <- NEFSCindex %>% 
  left_join(index_pca, by = "Year") %>% 
  group_by(stat_area, name) %>% 
  summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
            corPC2 = corrr::correlate(lob_index, PC2)[[2]],
            corPC3 = corrr::correlate(lob_index, PC3)[[2]], .groups = "drop")

NEFSCindex %>% 
  ggplot() +
  geom_line(aes(Year, lob_index)) +
  facet_wrap(~name) +
  labs(y = "Nefsc trawl lob abundance index")

NMFS stat area 511-513

Lobster biomass and abundance for statistical areas 511-513.

Offshore trawl increasing faster than landings - relative exploitation declining.

Impose break with the biological data

Lobster assessment

NEFSC_meIndex_cors <- NEFSC_meIndex %>% 
  left_join(index_pca, by = c("Year", "stat_area")) %>% 
  group_by(stat_area) %>% 
  summarise(corPC1 = corrr::correlate(`stratified biomass (kg)`, PC1)[[2]],
            corPC2 = corrr::correlate(`stratified biomass (kg)`, PC2)[[2]],
            corPC3 = corrr::correlate(`stratified biomass (kg)`, PC3)[[2]], .groups = "drop")

nmfsBIO <- NEFSC_meIndex %>% 
  ggplot() +
  geom_line(aes(Year, `stratified biomass (kg)`, col = as.factor(stat_area))) +
  facet_wrap(~season) +
  labs(y = "Lobster biomass") +
  scale_color_discrete(name = "Stat area")

nmfsBIO_var <- NEFSC_meIndex %>% 
  ggplot() +
  geom_line(aes(Year, bio_var, col = as.factor(stat_area))) +
  facet_wrap(~season) +
  labs(y = "variance") +
  scale_color_discrete(name = "Stat area")

nmfsNUM <- NEFSC_meIndex %>% 
  ggplot() +
  geom_line(aes(Year, `stratified abundance`, col = as.factor(stat_area))) +
  facet_wrap(~season) +
  labs(y = "Lobster abundance") +
  scale_color_discrete(name = "Stat area")

nmfsNUM_var <- NEFSC_meIndex %>% 
  ggplot() +
  geom_line(aes(Year, abun_var, col = as.factor(stat_area))) +
  facet_wrap(~season) +
  labs(y = "variance") +
  scale_color_discrete(name = "Stat area")

nmfsBIO / nmfsNUM 

Biological data analysis

xyPlot <- function(x){
  x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    ggplot() +
    geom_point(aes(PC1, lob_index, color = stat_area)) + 
    geom_smooth(aes(PC1, lob_index, color = stat_area), method = "lm", se = FALSE) +
    facet_wrap(~name)
}

aicModel <- function(x){
  x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    select(stat_area, lob_index, PC1, PC2, PC3, name) %>% 
    group_by(stat_area, name) %>% 
    nest() %>% 
    mutate(aic = purrr::map(data, ~MASS::stepAIC(object = lm(lob_index ~ PC1 + PC2 + PC3, data = .x), direction = "both", trace = 0)),
           stp = purrr::map(aic, broom::glance),
           index = purrr::map(aic, ~as.character(.x$call$formula)),
           model = paste(index[[1]][[2]], index[[1]][[1]], index[[1]][[3]])) %>% 
    select(stat_area, stp, model, name) %>% 
    unnest(c(stp, model))
}


aicModel_indicators <- function(x){
  x %>% 
    left_join(index_pca) %>% 
    na.omit() %>% 
    select(stat_area, lob_index, bot_temp, bot_sal, MCCPC1, predators, FirstMode, SecondMode, name) %>% 
    group_by(stat_area, name) %>% 
    nest() %>% 
    mutate(aic = purrr::map(data, ~MASS::stepAIC(object = lm(lob_index ~ bot_temp + bot_sal + MCCPC1 + predators + FirstMode + SecondMode, data = .x), direction = "both", trace = 0)),
           stp = purrr::map(aic, broom::glance),
           index = purrr::map(aic, ~as.character(.x$call$formula)),
           model = paste(index[[1]][[2]], index[[1]][[1]], index[[1]][[3]])) %>% 
    select(stat_area, stp, model, name) %>% 
    unnest(c(stp, model))
}

##################################################

# break out trawl survey by stat area
# see what groups together to see if there is a distinction in the life history patterns
# Look at coefficients to see if PC1 is always the biggest

ALSI index

The ALSI index is not significantly correlated with PC1 or PC

Correlation table

ALSI_cors %>% 
  na.omit() %>% 
  knitr::kable()
stat_area name corPC1 corPC2 corPC3
511 ALSI -0.4288498 0.2932412 -0.0992856
512 ALSI -0.3578807 0.2870086 -0.0725102
513 ALSI -0.5299711 -0.5477971 -0.5985873

Scatter plot

xyPlot(ALSI)

AIC lm model selection table

aicSel <- aicModel(ALSI)
aicSel2 <- aicModel_indicators(ALSI)
knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.1839122 0.1211362 0.3814058 2.929658 0.1107031 1 -5.752451 17.50490 19.62905 1.891115 13 15 lob_index ~ PC1 ALSI
512 0.1280786 0.0699505 0.3335957 2.203385 0.1584172 1 -4.395033 14.79007 17.28971 1.669291 15 17 lob_index ~ PC1 ALSI
513 0.6730608 0.5976133 0.3180746 8.920916 0.0017959 3 -2.368735 14.73747 18.90354 1.315229 13 17 lob_index ~ PC1 + PC2 + PC3 ALSI
knitr::kable(aicSel2)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.3579915 0.2509901 0.3521034 3.345670 0.0700237 2 -3.953047 15.90610 18.73830 1.487722 12 15 lob_index ~ predators + FirstMode ALSI
512 0.3815809 0.2932354 0.2908069 4.319186 0.0345923 2 -1.475002 10.95000 14.28286 1.183962 14 17 lob_index ~ bot_sal + FirstMode ALSI
513 0.6552601 0.6060116 0.3147379 13.305165 0.0005787 2 -2.819371 13.63874 16.97160 1.386839 14 17 lob_index ~ FirstMode + SecondMode ALSI

Subleagal index

Correlation table

sub_leagal_cors %>%  
  na.omit() %>% 
  knitr::kable()
stat_area name corPC1 corPC2 corPC3
511 sublegal_cpue 0.5073787 0.4448504 0.6360763
512 sublegal_cpue 0.6492143 -0.2746007 0.1923249
513 sublegal_cpue 0.4573274 0.5832219 0.3604273

Scatter plot

xyPlot(sublegal_cpue)

AIC lm model selection table

aicSel <- aicModel(sublegal_cpue)
aicSel2 <- aicModel_indicators(sublegal_cpue)
knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.6262736 0.4661051 1005.722 3.910093 0.0625067 3 -89.17048 188.3410 190.3304 7080344 7 11 lob_index ~ PC1 + PC2 + PC3 sublegal_cpue
512 0.4214792 0.3571991 3148.115 6.556917 0.0306545 1 -103.10479 212.2096 213.4033 89195644 9 11 lob_index ~ PC1 sublegal_cpue
513 0.4610751 0.3263439 1371.502 3.422185 0.0843554 2 -93.31711 194.6342 196.2258 15048151 8 11 lob_index ~ PC1 + PC2 sublegal_cpue
knitr::kable(aicSel2)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.8429002 0.6858004 771.5305 5.365380 0.0444765 5 -84.40395 182.8079 185.5932 2976297 5 11 lob_index ~ bot_temp + bot_sal + MCCPC1 + predators + FirstMode sublegal_cpue
512 0.8129475 0.7327821 2029.7621 10.140880 0.0060908 3 -96.89482 203.7896 205.7791 28839538 7 11 lob_index ~ bot_sal + MCCPC1 + FirstMode sublegal_cpue
513 0.6187481 0.5234352 1153.5554 6.491752 0.0211275 2 -91.41347 190.8269 192.4185 10645520 8 11 lob_index ~ FirstMode + SecondMode sublegal_cpue

ME yearly landings

Correlation table

ME_landings_cors %>%  
  na.omit() %>% 
  knitr::kable()
stat_area corPC1 corPC2 corPC3
511 0.5236554 0.1715022 0.1289351
512 0.5158596 -0.0119708 -0.0324557
513 0.5247501 0.3887277 0.0080890

Scatter plot

xyPlot(ME_pounds)

AIC lm model selection table

aicSel <- aicModel(ME_pounds)
aicSel2 <- aicModel_indicators(ME_pounds)
knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.2742149 0.2528683 31386881 12.84583 0.0010472 1 -671.4814 1348.963 1353.713 3.349463e+16 34 36 lob_index ~ PC1 ME_landings
512 0.2661111 0.2445262 31561621 12.32854 0.0012808 1 -671.6812 1349.362 1354.113 3.386862e+16 34 36 lob_index ~ PC1 ME_landings
513 0.4264719 0.3917126 28320700 12.26930 0.0001038 2 -667.2433 1342.487 1348.821 2.646805e+16 33 36 lob_index ~ PC1 + PC2 ME_landings
knitr::kable(aicSel2)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.2701382 0.2486717 31474907 12.584164 0.0011591 1 -671.5822 1349.164 1353.915 3.368277e+16 34 36 lob_index ~ bot_temp ME_landings
512 0.3241904 0.2608332 31219130 5.116871 0.0052681 3 -670.1972 1350.394 1358.312 3.118829e+16 32 36 lob_index ~ bot_temp + MCCPC1 + FirstMode ME_landings
513 0.5127226 0.4831907 26104463 17.361617 0.0000071 2 -664.3098 1336.620 1342.954 2.248762e+16 33 36 lob_index ~ predators + SecondMode ME_landings

ME Trawl index

Correlation table

MEDMR_cors %>% 
  na.omit() %>% 
  knitr::kable()
stat_area Season corPC1 corPC2 corPC3
511 Fall 0.6324384 -0.1233081 -0.0104265
511 Spring 0.7341690 -0.1777751 0.0575549
512 Fall 0.5535123 0.0053441 -0.2352774
512 Spring 0.5912927 -0.1923456 -0.0312973
513 Fall 0.1510798 0.6992976 0.1467033
513 Spring 0.8464540 0.3206713 0.3111505

Scatter plot

xyPlot(MEDMR_cpue)

AIC lm model selection table

aicSel <- aicModel(MEDMR_cpue)
aicSel2 <- aicModel_indicators(MEDMR_cpue)
knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.4262685 0.4077610 17.28304 23.032241 0.0000382 1 -139.8343 285.6687 290.1582 9259.808 31 33 lob_index ~ PC1 ME-NH_trawl
512 0.4405715 0.4032762 24.15699 11.813077 0.0001645 2 -150.3433 308.6866 314.6726 17506.801 30 33 lob_index ~ PC1 + PC2 ME-NH_trawl
513 0.2477686 0.1976198 11.11594 4.940672 0.0139724 2 -124.7289 257.4578 263.4438 3706.923 30 33 lob_index ~ PC1 + PC2 ME-NH_trawl
knitr::kable(aicSel2)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.5743387 0.5135299 15.66389 9.444999 0.0000580 4 -134.9088 281.8176 290.7966 6870.011 28 33 lob_index ~ MCCPC1 + predators + FirstMode + SecondMode ME-NH_trawl
512 0.4680004 0.4129659 23.96005 8.503772 0.0003312 3 -149.5138 309.0276 316.5101 16648.440 29 33 lob_index ~ MCCPC1 + FirstMode + SecondMode ME-NH_trawl
513 0.2443062 0.1939266 11.14149 4.849309 0.0149688 2 -124.8047 257.6093 263.5954 3723.986 30 33 lob_index ~ MCCPC1 + FirstMode ME-NH_trawl

NEFSC trawl index

Correlation table

NEFSC_cors %>% 
  filter(name %in% c("NefscFQ2", "NefscFQ4", "NefscMQ2", "NefscMQ4")) %>% 
  na.omit() %>% 
  knitr::kable()
stat_area name corPC1 corPC2 corPC3
511 NefscFQ2 0.5832362 0.1028456 0.1441670
511 NefscMQ2 0.6113879 0.0909348 0.1291904
512 NefscFQ2 0.5425905 -0.0898670 -0.0344447
512 NefscMQ2 0.5671789 -0.1142050 -0.0021120
513 NefscFQ2 0.5252710 0.3482655 0.0382154
513 NefscMQ2 0.5585267 0.3173500 0.1560931

Scatter plot

xyPlot(NEFSCindex)

AIC lm model selection table

aicSel <- aicModel(NEFSCindex)
aicSel2 <- aicModel_indicators(NEFSCindex)
knitr::kable(aicSel)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.3401645 0.3207576 1.901116 17.52800 0.0001891 1 -73.18082 152.3616 157.1122 122.88424 34 36 lob_index ~ PC1 NefscFQ2
512 0.2944045 0.2736517 1.965933 14.18625 0.0006290 1 -74.38775 154.7755 159.5260 131.40635 34 36 lob_index ~ PC1 NefscFQ2
513 0.3971985 0.3606650 1.844423 10.87219 0.0002360 2 -71.55357 151.1071 157.4412 112.26254 33 36 lob_index ~ PC1 + PC2 NefscFQ2
511 0.3737951 0.3553773 1.240960 20.29533 0.0000746 1 -57.82479 121.6496 126.4001 52.35934 34 36 lob_index ~ PC1 NefscMQ2
512 0.3216919 0.3017417 1.291555 16.12472 0.0003097 1 -59.26342 124.5268 129.2774 56.71589 34 36 lob_index ~ PC1 NefscMQ2
513 0.4126631 0.3770669 1.219904 11.59290 0.0001537 2 -56.67137 121.3427 127.6768 49.10945 33 36 lob_index ~ PC1 + PC2 NefscMQ2
knitr::kable(aicSel2)
stat_area r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs model name
511 0.4059533 0.3293021 1.889121 5.296112 0.0022755 4 -71.29023 154.5805 164.0816 110.63209 31 36 lob_index ~ bot_temp + bot_sal + MCCPC1 + FirstMode NefscFQ2
512 0.3668151 0.2851138 1.950360 4.489711 0.0056088 4 -72.43871 156.8774 166.3785 117.92098 31 36 lob_index ~ bot_temp + bot_sal + MCCPC1 + FirstMode NefscFQ2
513 0.5495032 0.4744204 1.672305 7.318628 0.0001383 5 -66.31131 146.6226 157.7072 83.89813 30 36 lob_index ~ bot_temp + bot_sal + predators + FirstMode + SecondMode NefscFQ2
511 0.4329668 0.3598012 1.236694 5.917630 0.0011696 4 -56.03812 124.0762 133.5773 47.41177 31 36 lob_index ~ bot_temp + bot_sal + MCCPC1 + FirstMode NefscMQ2
512 0.3955147 0.3175166 1.276882 5.070825 0.0029147 4 -57.18939 126.3788 135.8799 50.54328 31 36 lob_index ~ bot_temp + bot_sal + MCCPC1 + FirstMode NefscMQ2
513 0.5698008 0.4981009 1.094997 7.947027 0.0000727 5 -51.06707 116.1341 127.2188 35.97057 30 36 lob_index ~ bot_temp + bot_sal + predators + FirstMode + SecondMode NefscMQ2

Cluster Analysis

GOMindex_ca <- MEDMR_cpue %>% 
  filter(Season == "Spring") %>% 
  select(Year, lob_index, name, stat_area)

NEFSCindex_ca <- NEFSCindex %>% 
  filter(Season == 2) %>% 
  select(Year, lob_index, name)

lobData <- bind_rows(ALSI, sublegal_cpue, ME_pounds, GOMindex_ca, NEFSCindex_ca) %>% 
  pivot_wider(names_from = c(name, stat_area),
              values_from = lob_index) %>% 
  na.omit() %>% 
  column_to_rownames("Year") 

# standardize variables
lobData <- scale(lobData) %>% 
  data.frame()

lobData_t <- data.table::transpose(lobData)

# get row and colnames in order
colnames(lobData_t) <- rownames(lobData)
rownames(lobData_t) <- colnames(lobData)



# determine number of clusters
wss <- (nrow(lobData)-1)*sum(apply(lobData,2,var)) # get sum of squares

for (i in 2:12) wss[i] <- sum(kmeans(lobData,
   centers=i)$withinss)

# look for break in plot like a scree plot
breakplot <- plot(1:12, wss, type="b", xlab="Number of Clusters",
  ylab="Within groups sum of squares")

breakinfo <- fpc::pamk(lobData)

# K-Means Cluster Analysis
fit <- kmeans(lobData, 2) # 3 cluster solution
# get cluster means
cluster_means <- aggregate(lobData,by=list(fit$cluster),FUN=mean)
# append cluster assignment
lobData_cluster <- data.frame(lobData, fit$cluster)

# Cluster Plot against 1st 2 principal components

# vary parameters for most readable graph
cluster::clusplot(lobData, fit$cluster, color=TRUE, shade=TRUE,
   labels=2, lines=0)

# Centroid Plot against 1st 2 discriminant functions
centroidPlot <- fpc::plotcluster(lobData, fit$cluster)

Variance

Variance shows the spread in the data. We are using this stat to see if these indicators and the response variables are becoming more variable with time.

p0 <- yrFVCOM %>% 
  ggplot() +
  geom_line(aes(Year, bot_temp_var)) +
  facet_wrap(~stat_area) +
  labs(y = "Bottom temp variance")

p1 <- yrFVCOM %>% 
  ggplot() +
  geom_line(aes(Year, sur_temp_var)) +
  facet_wrap(~stat_area) +
  labs(y = "SST variance (FVCOM)")

p2 <- yrOISST %>% 
  ggplot() +
  geom_line(aes(Year, temp_var)) +
  facet_wrap(~stat_area)  +
  labs(y = "SST variance (OISST)")
  

p3 <- yrSal %>% 
  ggplot() +
  geom_line(aes(Year, bot_sal_var)) +
  facet_wrap(~stat_area) +
  labs(y = "Bottom salinity variance")

p4 <- yrSal %>% 
  ggplot() +
  geom_line(aes(Year, sur_sal_var)) +
  facet_wrap(~stat_area) +
  labs(y = "SSS variance")

p5 <- sublegal_vari %>% 
  ggplot() +
  geom_line(aes(Year, vari)) +
  facet_wrap(~stat_area) +
  labs("VTS variance")

p6 <- MEDMR_vari %>% 
  ggplot() +
  geom_line(aes(Year, vari, col = Season)) +
  facet_wrap(~stat_area) +
  labs("ME-NH lob biomass var")

p6_1 <- MEDMR_vari %>% 
  mutate(vari = if_else(vari > 1000, 500, vari)) %>% 
  ggplot() +
  geom_line(aes(Year, vari, col = Season)) +
  facet_wrap(~stat_area) +
  labs("ME-NH lob biomass var")

p7 <- NEFSC_meIndex %>% 
  filter(Year >= 1980) %>% 
  ggplot() +
  geom_line(aes(Year, bio_var, col = season)) +
  facet_wrap(~stat_area) +
  labs("NEFSC lob biomass var")

Temperature

p0 / p1 / p2

Salinity

p3 / p4

Sublegal Lobsters

p5

ME-NH trawl survey

p6

p6_1

Nefsc trawl survey

p7